

clear all

* get production data from trade data (to avoid the currency conversion problem in the SEA data)
* this would be in million dollars
use gravity_wiod, clear
collapse (sum) trade, by(exporter sector)
rename trade production
* check totals
egen production_total = sum(production), by(exporter)
rename exporter country
save "production_totals.dta", replace

* get absorption data from trade data (to avoid the currency conversion problem in the SEA data)
* this would be in million dollars
use gravity_wiod, clear
collapse (sum) trade, by(importer sector)
rename trade absorption
rename importer country
saveold "absorption_totals.dta", replace



 
* LOADING THE SOCIO-ECONOMIC ACCOUNT DATA FROM WIOD: 
 
/*
use SEA_all, clear
drop description
reshape long _, i(variable country sector code) j(year)
rename _ value
rename _ value
rename value v_
reshape wide v_, i(country sector code year) j(variable) string
sort year country sector
save SEA_wide, replace
*/

* Note: issues with the variable "description"


* description of variables in WIOD

/* 
Values	Description
GO	Gross output by industry at current basic prices (in millions of national currency)
II	Intermediate inputs at current purchasers' prices (in millions of national currency)
VA	Gross value added at current basic prices (in millions of national currency)
COMP	Compensation of employees (in millions of national currency)
LAB	Labour compensation (in millions of national currency)
CAP	Capital compensation (in millions of national currency)
GFCF	Nominal gross fixed capital formation (in millions of national currency)
EMP	Number of persons engaged (thousands)
EMPE	Number of employees (thousands)
H_EMP	Total hours worked by persons engaged (millions)
H_EMPE	Total hours worked by employees (millions)
	
Prices	
GO_P	Price levels gross output, 1995=100
II_P	Price levels of intermediate inputs, 1995=100
VA_P	Price levels of gross value added, 1995=100
GFCF_P	Price levels of gross fixed capital formation, 1995=100
	
Volumes	
GO_QI	Gross output, volume indices, 1995 = 100
II_QI	Intermediate inputs, volume indices, 1995 = 100
VA_QI	Gross value added, volume indices, 1995 = 100
K_GFCF	Real fixed capital stock, 1995 prices
	
Additional variables	
LABHS	High-skilled labour compensation  (share in total labour compensation)
LABMS	Medium-skilled labour compensation  (share in total labour compensation)
LABLS	Low-skilled labour compensation  (share in total labour compensation)
H_HS	Hours worked by high-skilled persons engaged (share in total hours)
H_MS	Hours worked by medium-skilled persons engaged (share in total hours)
H_LS	Hours worked by low-skilled persons engaged (share in total hours)
*/ 




*********************************
* START HERE -- for Skill premium paper


use SEA_wide, clear
set more off

*keep if year == 2005
keep v_EMP v_GFCF v_CAP v_LAB* v_H_HS v_H_MS v_H_LS v_H_EMP v_H_EMPE  v_GO v_VA country sector code year

* jan 2019: some data seems to be missing for some years:
bysort year: count if v_H_HS == .
* missing 2010 and 2011

drop if sector == 0

* need to download july 2014 version!!
* note: even that doesn't include skill data past 2009
tab country if v_LAB == .

* merge pop and pci data
rename country r
merge m:1 r using pci_and_pop.dta
drop _merge
rename r country

rename log_pctotalfd logpci

* -----------------------------
* Backing out the skill premium

* a couple checks into the wage data
g wage = v_LAB / v_H_EMP
g wage_unskill = (v_LAB*v_LABLS) / (v_H_EMP * v_H_LS)

* millions local currency / millions of hours = $/hour
bysort country: sum wage
bysort country: sum wage_unskill
* seems to make sense
sum wage wage_unskill

* skill ratio based on wage bill

g skillratio = (v_LABHS + v_LABMS)/ v_LABLS

g logskillratio = log(skillratio)

*hist skillratio 

bysort sector: sum skillratio

* skill premium by sector:  (wage = factor payment / factor usage (hours)) 
gen skillprem = skillratio * v_H_LS/ (v_H_HS + v_H_MS) 
sum skillprem

gen logskillprem = log(skillprem)

* weighted average skill premium by ctry:
* compute country total
egen v_LABHS_tot = sum(v_LABHS * v_LAB) , by(country year)
egen v_LABMS_tot = sum(v_LABMS * v_LAB) , by(country year)
egen v_LABLS_tot = sum(v_LABLS * v_LAB) , by(country year)

egen v_H_LS_tot = sum(v_H_LS * v_H_EMP) , by(country year)
egen v_H_MS_tot = sum(v_H_MS * v_H_EMP) , by(country year)
egen v_H_HS_tot = sum(v_H_HS * v_H_EMP) , by(country year)

g skillratio_tot = (v_LABHS_tot + v_LABMS_tot)/ v_LABLS_tot

* wage_h/wage_l
gen skillprem_tot = skillratio_tot * v_H_LS_tot/ (v_H_HS_tot + v_H_MS_tot) 

* same as:
*gen skillprem_tot = (v_LABHS_tot + v_LABMS_tot)/( v_H_HS_tot + v_H_MS_tot) * ((v_LABLS_tot ) /( v_H_LS_tot ) )^(-1)

* puttin Middle-skilled with unskilled
gen skillprem_tot_M = (v_LABHS_tot )/( v_H_HS_tot ) * ((v_LABLS_tot + v_LABMS_tot ) /( v_H_LS_tot + v_H_MS_tot ) )^(-1)

corr skillprem_tot skillprem_tot_M

scatter skillprem_tot skillprem_tot_M if year == 2005, mlabel(country)


* compute the ratio of the two

g skillprem_09 = skillprem_tot if year == 2009
egen skillprem_09_ = mean(skillprem_09), by(country)
g skillprem_0995_ratio = skillprem_09_ / skillprem_tot  if year == 1995

* same when putting medium with low-skill
g skillprem_09_M = skillprem_tot_M if year == 2009
egen skillprem_09__M = mean(skillprem_09_M), by(country)
g skillprem_0995_ratio_M = skillprem_09__M / skillprem_tot_M  if year == 1995



* export data:

drop if sector != 1

rename country region
keep region  skillprem_0995_ratio skillprem_0995_ratio_M skill_use_ratio_cons_out_0995chg
drop if skillprem_0995_ratio ==.


replace region ="ROM" if region =="ROU"	



save "skill_premium_wiod.dta", replace





